rm(list=ls())
library(tidyverse); library(dplyr)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) 

dfname <- list.files(path = 'Data/Prediction', pattern = 'img', recursive = T)
error  <- NULL
for (d in dfname){
  for (i in c('ff', 'vgg')){
    print(d)
    t <- readRDS(paste0('Data/Prediction/', d)) %>%
      rename_at(vars(matches(paste0(i, '$|^', i))), ~ gsub(i, 'im', .x)) %>%
      drop_na(race.mt, race.s, race.gs, race.fbisg, race.lstm_wk, race.im, race.hybrid) %>%
      distinct(full_name, 
               place_fips, state,
               race.mt, race.s, race.gs, race.fbisg, race.lstm, race.lstm_wk, race.lstm_nc, race.im, race.hybrid)
    
    if (nrow(t)==0){ stop() }
    
    racecat <- c('bla', 'whi', 'his', 'asi', 'oth')
    for (p in c('s', 'gs', 'im', 'hybrid', 'lstm', 'lstm_nc', 'lstm_wk', 'fbisg')){
      for (r in racecat){
        # false positive
        fp <- sum(ifelse(t$race.mt != r & t[[paste0('race.', p)]] == r, 1, 0)) / nrow(t[t$race.mt!=r,])
        # false negative
        fn <- sum(ifelse(t$race.mt == r & t[[paste0('race.', p)]] != r, 1, 0)) / nrow(t[t$race.mt==r,])
        # total error
        er <- nrow(t[t$race.mt != t[[paste0('race.', p)]],]) / nrow(t)
        rn <- nrow(t[t$race.mt==r,]) 
        error <- bind_rows(error, data.frame(df = d, img = i, race = r, rp = rn/ nrow(t), rn = rn, 
                                             pred = p, fp = fp, fn = fn, er = er))  
      }
    }
  }
}

write.csv(error, 'Output/val_error.csv', na='', row.names = F)

error <- read.csv('Output/val_error.csv')
creattb <- function(dftype, img, outfile){
  t       <- error[error$df==dftype & error$img == img, 3:ncol(error)]  
  total_n <- sum(t$rn[t$pred=='s'])
  
  temp    <- bind_rows(t[,c(1:2,4:5)] %>% mutate(error = 'False Positive') %>% rename(est = fp), 
                       t[,c(1:2,4,6)] %>% mutate(error = 'False Negative') %>% rename(est = fn),
                       t[t$race=='asi',c(1:2,4,7)] %>% mutate(error = '1Overall Error Rate') %>% rename(est = er)) %>%
    spread(key = pred, value = est) %>%
    mutate(race = ifelse(race == 'asi', 'Asian', 
                         ifelse(race == 'bla', 'Black', 
                                ifelse(race == 'whi', 'IWhite', 
                                       ifelse(race == 'his', 'Hispanic', 'Other')))),
           race = paste0(race, ' (', round(rp*100), '%)')) %>%
    arrange(race, error) %>%
    mutate(race = ifelse(grepl('Overall', error), '', race),
           error = gsub('1', '', error),
           r = row_number(),
           race = ifelse(r %in% c(1,3,5,7,9,11), '', race)) %>%
    mutate(race = gsub('^I', '', race)) %>%
    select(race, error, s, gs, fbisg, lstm_wk, im, hybrid)
  names(temp) <- c('race', 'error', 'BSO', 'BISG', 'fBISG', 'LSTM', 'Image', 'Hybrid')
  print(temp, digits = 3)
  stargazer::stargazer(temp, summary = F, digits = 3, rownames = F,
                       notes = paste0('Total sample size is ', as.character(total_n), '.'),
                       out = outfile)
}

# Table 2
creattb(dftype = 'img5_weighted_opencv_cov2f_nofw_level.rds', img = 'ff', outfile = 'Tables/Table 2.tex')

# In-Text Analysis
d <- readRDS('Data/Prediction/img5_weighted_opencv_cov2f_nofw_level.rds') %>%
  drop_na(race.mt, race.s, race.gs, race.fbisg, race.lstm_wk, race.ff, race.hybrid) %>%
  distinct(full_name, 
         place_fips, state,
         race.mt, race.s, race.gs, race.fbisg, race.lstm_wk, race.ff, race.hybrid)

addmargins(table(true=d$race.mt, pred=d$race.s, useNA = 'always'))
649/736

addmargins(table(true=d$race.mt, pred=d$race.ff, useNA = 'always'))
403/485

0.909/0.142
